exps=output/mind2web-qwen2-vl-RL
log_dir=${exps}/output.log
if [ ! -d "$exps" ]; then
 mkdir -p "$exps"
fi

model_path=""
dataset_name="../../data_config/mind2web.yaml"
image_path=""

# export DEBUG_MODE=true
export LOG_PATH=${exps}/mind2web_qwen2-vl_cold_start_report_rewards.log
export DEBUG_MODE="true"
export WANDB_API_KEY="Your wendb key"
export WANDB_PROJECT=""



torchrun --nproc_per_node=8 \
    --master_port=12348 \
    grpo_rec.py \
    --deepspeed ../../local_scripts/zero3.json \
    --output_dir ${exps} \
    --model_name_or_path ${model_path} \
    --dataset_name ${dataset_name} \
    --image_root ${image_path} \
    --max_prompt_length 1024 \
    --max_completion_length 512 \
    --num_generations 12 \
    --per_device_train_batch_size 12 \
    --gradient_accumulation_steps 1 \
    --logging_steps 1 \
    --bf16 \
    --torch_dtype bfloat16 \
    --reward_funcs action_accuracy_type action_accuracy_position action_format format think_format \
    --data_seed 42 \
    --report_to wandb \
    --gradient_checkpointing true \
    --attn_implementation flash_attention_2 \
    --num_train_epochs 10 \
    --run_name ${exps} \
    --save_steps 200 \
    --save_total_limit 2 \
    --save_only_model true
    2>&1 | tee -a ${log_dir}
